In [1]:
#imports
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots

import folium

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline 

import math
import random
from datetime import timedelta

import warnings
warnings.filterwarnings('ignore')

#colour pallette
cnf ='#393e46'
dth = '#ff2e63'
rec = '#21bf73'
act = '#fe9801'
In [2]:
#DATASET PREPARATION
In [3]:
import plotly as py
py.offline.init_notebook_mode(connected = True)
In [4]:
df = pd.read_csv('covid_19_data_cleaned.csv',parse_dates=['Date'])
In [5]:
df
Out[5]:
Date Province/State Country Lat Long Confirmed Recovered Deaths Active
0 2020-01-22 NaN Afghanistan 33.93911 67.709953 0 0 0 0
1 2020-01-23 NaN Afghanistan 33.93911 67.709953 0 0 0 0
2 2020-01-24 NaN Afghanistan 33.93911 67.709953 0 0 0 0
3 2020-01-25 NaN Afghanistan 33.93911 67.709953 0 0 0 0
4 2020-01-26 NaN Afghanistan 33.93911 67.709953 0 0 0 0
... ... ... ... ... ... ... ... ... ...
136280 2021-05-16 NaN Timor-Leste -8.87420 125.727500 0 2336 0 -2336
136281 2021-05-17 NaN Timor-Leste -8.87420 125.727500 0 2406 0 -2406
136282 2021-05-18 NaN Timor-Leste -8.87420 125.727500 0 2517 0 -2517
136283 2021-05-19 NaN Timor-Leste -8.87420 125.727500 0 2636 0 -2636
136284 2021-05-20 NaN Timor-Leste -8.87420 125.727500 0 2716 0 -2716

136285 rows × 9 columns

In [6]:
df['Province/State'] = df['Province/State'].fillna("")
df
Out[6]:
Date Province/State Country Lat Long Confirmed Recovered Deaths Active
0 2020-01-22 Afghanistan 33.93911 67.709953 0 0 0 0
1 2020-01-23 Afghanistan 33.93911 67.709953 0 0 0 0
2 2020-01-24 Afghanistan 33.93911 67.709953 0 0 0 0
3 2020-01-25 Afghanistan 33.93911 67.709953 0 0 0 0
4 2020-01-26 Afghanistan 33.93911 67.709953 0 0 0 0
... ... ... ... ... ... ... ... ... ...
136280 2021-05-16 Timor-Leste -8.87420 125.727500 0 2336 0 -2336
136281 2021-05-17 Timor-Leste -8.87420 125.727500 0 2406 0 -2406
136282 2021-05-18 Timor-Leste -8.87420 125.727500 0 2517 0 -2517
136283 2021-05-19 Timor-Leste -8.87420 125.727500 0 2636 0 -2636
136284 2021-05-20 Timor-Leste -8.87420 125.727500 0 2716 0 -2716

136285 rows × 9 columns

In [7]:
df = pd.read_csv('covid_19_data_cleaned.csv',parse_dates=['Date'])

country_daywise = pd.read_csv('country_daywise.csv',parse_dates=['Date'])
countrywise = pd.read_csv('countrywise.csv')
daywise = pd.read_csv('daywise.csv',parse_dates=['Date'])
In [8]:
df['Province/State'] = df['Province/State'].fillna("")
df.head()
Out[8]:
Date Province/State Country Lat Long Confirmed Recovered Deaths Active
0 2020-01-22 Afghanistan 33.93911 67.709953 0 0 0 0
1 2020-01-23 Afghanistan 33.93911 67.709953 0 0 0 0
2 2020-01-24 Afghanistan 33.93911 67.709953 0 0 0 0
3 2020-01-25 Afghanistan 33.93911 67.709953 0 0 0 0
4 2020-01-26 Afghanistan 33.93911 67.709953 0 0 0 0
In [9]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered= df.groupby('Date').sum()['Recovered'].reset_index()
deaths= df.groupby('Date').sum()['Deaths'].reset_index()
deaths
Out[9]:
Date Deaths
0 2020-01-22 17
1 2020-01-23 18
2 2020-01-24 26
3 2020-01-25 42
4 2020-01-26 56
... ... ...
480 2021-05-16 3379449
481 2021-05-17 3390089
482 2021-05-18 3404338
483 2021-05-19 3417682
484 2021-05-20 3430326

485 rows × 2 columns

In [10]:
df.isnull().sum()
Out[10]:
Date              0
Province/State    0
Country           0
Lat               0
Long              0
Confirmed         0
Recovered         0
Deaths            0
Active            0
dtype: int64
In [11]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 136285 entries, 0 to 136284
Data columns (total 9 columns):
 #   Column          Non-Null Count   Dtype         
---  ------          --------------   -----         
 0   Date            136285 non-null  datetime64[ns]
 1   Province/State  136285 non-null  object        
 2   Country         136285 non-null  object        
 3   Lat             136285 non-null  float64       
 4   Long            136285 non-null  float64       
 5   Confirmed       136285 non-null  int64         
 6   Recovered       136285 non-null  int64         
 7   Deaths          136285 non-null  int64         
 8   Active          136285 non-null  int64         
dtypes: datetime64[ns](1), float64(2), int64(4), object(2)
memory usage: 9.4+ MB
In [12]:
df.query('Country == "India"')
Out[12]:
Date Province/State Country Lat Long Confirmed Recovered Deaths Active
71295 2020-01-22 India 20.593684 78.96288 0 0 0 0
71296 2020-01-23 India 20.593684 78.96288 0 0 0 0
71297 2020-01-24 India 20.593684 78.96288 0 0 0 0
71298 2020-01-25 India 20.593684 78.96288 0 0 0 0
71299 2020-01-26 India 20.593684 78.96288 0 0 0 0
... ... ... ... ... ... ... ... ... ...
71775 2021-05-16 India 20.593684 78.96288 24965463 21174076 274390 3516997
71776 2021-05-17 India 20.593684 78.96288 25228996 21596512 278719 3353765
71777 2021-05-18 India 20.593684 78.96288 25496330 21986363 283248 3226719
71778 2021-05-19 India 20.593684 78.96288 25772440 22355440 287122 3129878
71779 2021-05-20 India 20.593684 78.96288 26031991 22712735 291331 3027925

485 rows × 9 columns

In [13]:
#WORLDWIDE Total Confirmed,Recovered, and Deaths
In [14]:
confirmed.tail()
Out[14]:
Date Confirmed
480 2021-05-16 163069932
481 2021-05-17 163609626
482 2021-05-18 164231810
483 2021-05-19 164902902
484 2021-05-20 165531431
In [15]:
recovered.tail()
Out[15]:
Date Recovered
480 2021-05-16 99064469
481 2021-05-17 99808931
482 2021-05-18 100552044
483 2021-05-19 101250071
484 2021-05-20 101821428
In [16]:
deaths.tail()
Out[16]:
Date Deaths
480 2021-05-16 3379449
481 2021-05-17 3390089
482 2021-05-18 3404338
483 2021-05-19 3417682
484 2021-05-20 3430326
In [17]:
#Scatterplot for Confirmed, Recovered and Deaths 
fig = go.Figure()
fig.add_trace(go.Scatter(x =confirmed['Date'],y= confirmed['Confirmed'], mode = 'lines+markers',name='Confirmed',line = dict(color = "Black")))
fig.add_trace(go.Scatter(x =recovered['Date'],y= recovered['Recovered'], mode = 'lines+markers',name='Recovered',line = dict(color = "green")))
fig.add_trace(go.Scatter(x =deaths['Date'],y= deaths['Deaths'], mode = 'lines+markers',name='Deaths',line = dict(color = "Red")))
fig.update_layout(title = 'worldwide Covid-19', xaxis_tickfont_size = 14, yaxis = dict(title = 'Number of Cases'))

fig.show()
In [18]:
# CASES Density Animation ON WORLD MAP
In [19]:
df.info
Out[19]:
<bound method DataFrame.info of              Date Province/State      Country       Lat        Long  \
0      2020-01-22                 Afghanistan  33.93911   67.709953   
1      2020-01-23                 Afghanistan  33.93911   67.709953   
2      2020-01-24                 Afghanistan  33.93911   67.709953   
3      2020-01-25                 Afghanistan  33.93911   67.709953   
4      2020-01-26                 Afghanistan  33.93911   67.709953   
...           ...            ...          ...       ...         ...   
136280 2021-05-16                 Timor-Leste  -8.87420  125.727500   
136281 2021-05-17                 Timor-Leste  -8.87420  125.727500   
136282 2021-05-18                 Timor-Leste  -8.87420  125.727500   
136283 2021-05-19                 Timor-Leste  -8.87420  125.727500   
136284 2021-05-20                 Timor-Leste  -8.87420  125.727500   

        Confirmed  Recovered  Deaths  Active  
0               0          0       0       0  
1               0          0       0       0  
2               0          0       0       0  
3               0          0       0       0  
4               0          0       0       0  
...           ...        ...     ...     ...  
136280          0       2336       0   -2336  
136281          0       2406       0   -2406  
136282          0       2517       0   -2517  
136283          0       2636       0   -2636  
136284          0       2716       0   -2716  

[136285 rows x 9 columns]>
In [20]:
df['Date'] = df['Date'].astype(str)
In [21]:
df.info
Out[21]:
<bound method DataFrame.info of               Date Province/State      Country       Lat        Long  \
0       2020-01-22                 Afghanistan  33.93911   67.709953   
1       2020-01-23                 Afghanistan  33.93911   67.709953   
2       2020-01-24                 Afghanistan  33.93911   67.709953   
3       2020-01-25                 Afghanistan  33.93911   67.709953   
4       2020-01-26                 Afghanistan  33.93911   67.709953   
...            ...            ...          ...       ...         ...   
136280  2021-05-16                 Timor-Leste  -8.87420  125.727500   
136281  2021-05-17                 Timor-Leste  -8.87420  125.727500   
136282  2021-05-18                 Timor-Leste  -8.87420  125.727500   
136283  2021-05-19                 Timor-Leste  -8.87420  125.727500   
136284  2021-05-20                 Timor-Leste  -8.87420  125.727500   

        Confirmed  Recovered  Deaths  Active  
0               0          0       0       0  
1               0          0       0       0  
2               0          0       0       0  
3               0          0       0       0  
4               0          0       0       0  
...           ...        ...     ...     ...  
136280          0       2336       0   -2336  
136281          0       2406       0   -2406  
136282          0       2517       0   -2517  
136283          0       2636       0   -2636  
136284          0       2716       0   -2716  

[136285 rows x 9 columns]>
In [22]:
fig = px.density_mapbox(df, lat = 'Lat', lon = 'Long',hover_name = 'Country', hover_data=['Confirmed','Recovered','Deaths'],animation_frame = 'Date', color_continuous_scale='Portland', radius =7 ,zoom = 0,height = 700)
fig.update_layout(title = 'worldwide Covid-19 Cases with Time laps')
fig.update_layout(mapbox_style= 'open-street-map',mapbox_center_lon = 0 )

fig.show()
In [100]:
#CASES Over the Time with Area Plot
In [23]:
temp = df.groupby('Date')['Confirmed','Deaths','Recovered','Active'].sum().reset_index()
temp =temp[temp['Date']==max(temp['Date'])].reset_index(drop = True)
tm = temp.melt(id_vars = 'Date', value_vars = ['Active','Deaths','Recovered'])
fig = px.treemap(tm,path = ['variable'],values ='value',height=250,width = 800,color_discrete_sequence=[act,rec,dth])

fig.data[0].textinfo = 'label+text+value'
fig.show()
In [24]:
temp = df.groupby('Date')['Recovered','Deaths','Active'].sum().reset_index()
temp = temp.melt(id_vars = 'Date', value_vars = ['Recovered','Deaths','Active'], var_name ='Case',value_name = 'Count')

fig = px.area(temp, x ='Date',y = 'Count', color = 'Case', height =600, title = 'Cases over time',color_discrete_sequence=[rec,dth,act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()
In [ ]:
 
In [ ]:
 
In [ ]: